import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
link = 'https://raw.githubusercontent.com/sohaibbinmohsin/dm-project-boston-crime-analysis/main/Dataset.csv'
df = pd.read_csv(link, encoding = "ISO-8859-1", low_memory=False)
df.head(10)
| INCIDENT_NUMBER | OFFENSE_CODE | OFFENSE_CODE_GROUP | OFFENSE_DESCRIPTION | DISTRICT | REPORTING_AREA | SHOOTING | OCCURRED_ON_DATE | YEAR | MONTH | DAY_OF_WEEK | HOUR | STREET | Lat | Long | Location | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | I192068249 | 2647 | Other | THREATS TO DO BODILY HARM | B2 | 280 | NaN | 8/28/15 10:20 | 2015 | 8 | Friday | 10 | WASHINGTON ST | 42.330119 | -71.084251 | (42.33011862, -71.08425106) |
| 1 | I192061894 | 1106 | Confidence Games | FRAUD - CREDIT CARD / ATM FRAUD | C11 | 356 | NaN | 8/20/15 0:00 | 2015 | 8 | Thursday | 0 | CHARLES ST | 42.300605 | -71.061268 | (42.30060543, -71.06126785) |
| 2 | I192038828 | 1107 | Fraud | FRAUD - IMPERSONATION | A1 | 172 | NaN | 11/2/15 12:24 | 2015 | 11 | Monday | 12 | ALBANY ST | 42.334288 | -71.072395 | (42.33428841, -71.07239518) |
| 3 | I192008877 | 1107 | Fraud | FRAUD - IMPERSONATION | E18 | 525 | NaN | 7/31/15 10:00 | 2015 | 7 | Friday | 10 | WINGATE RD | 42.237009 | -71.129566 | (42.23700950, -71.12956606) |
| 4 | I182090828 | 1102 | Fraud | FRAUD - FALSE PRETENSE / SCHEME | D4 | 159 | NaN | 12/1/15 12:00 | 2015 | 12 | Tuesday | 12 | UPTON ST | 42.342432 | -71.072258 | (42.34243222, -71.07225766) |
| 5 | I182090458 | 1107 | Fraud | FRAUD - IMPERSONATION | C11 | 336 | NaN | 12/4/15 0:00 | 2015 | 12 | Friday | 0 | NORTON ST | 42.306265 | -71.068646 | (42.30626521, -71.06864556) |
| 6 | I182081063 | 1107 | Fraud | FRAUD - IMPERSONATION | E18 | 478 | NaN | 12/1/15 12:25 | 2015 | 12 | Tuesday | 12 | RUSKINDALE RD | 42.268897 | -71.108137 | (42.26889666, -71.10813750) |
| 7 | I182074094 | 2629 | Harassment | HARASSMENT | B2 | 258 | NaN | 9/14/15 9:31 | 2015 | 9 | Monday | 9 | COLUMBIA RD | 42.315142 | -71.067047 | (42.31514179, -71.06704709) |
| 8 | I182066132 | 2629 | Harassment | HARASSMENT | B3 | 455 | NaN | 7/31/15 23:27 | 2015 | 7 | Friday | 23 | RADCLIFFE ST | 42.300202 | -71.078354 | (42.30020194, -71.07835353) |
| 9 | I182061268 | 3201 | Property Lost | PROPERTY - LOST | NaN | NaN | 6/15/15 0:00 | 2015 | 6 | Monday | 0 | BERNARD | -1.000000 | -1.000000 | (-1.00000000, -1.00000000) |
df.dtypes
INCIDENT_NUMBER object OFFENSE_CODE int64 OFFENSE_CODE_GROUP object OFFENSE_DESCRIPTION object DISTRICT object REPORTING_AREA object SHOOTING object OCCURRED_ON_DATE object YEAR int64 MONTH int64 DAY_OF_WEEK object HOUR int64 STREET object Lat float64 Long float64 Location object dtype: object
df.describe()
| OFFENSE_CODE | YEAR | MONTH | HOUR | Lat | Long | |
|---|---|---|---|---|---|---|
| count | 567623.000000 | 567623.000000 | 567623.000000 | 567623.000000 | 545093.000000 | 5.450930e+05 |
| mean | 2333.235131 | 2017.965038 | 6.624245 | 13.024004 | 36.494844 | -6.129973e+01 |
| std | 1195.613580 | 1.897510 | 3.333956 | 6.333570 | 14.585577 | 2.448707e+01 |
| min | 100.000000 | 2015.000000 | 1.000000 | 0.000000 | -1.000000 | -7.117867e+01 |
| 25% | 1102.000000 | 2016.000000 | 4.000000 | 9.000000 | 42.284070 | -7.109272e+01 |
| 50% | 3005.000000 | 2018.000000 | 7.000000 | 14.000000 | 42.316617 | -7.107240e+01 |
| 75% | 3201.000000 | 2020.000000 | 9.000000 | 18.000000 | 42.344513 | -7.105576e+01 |
| max | 99999.000000 | 2022.000000 | 12.000000 | 23.000000 | 42.395042 | 5.250000e-08 |
num_rows = df.shape[0]
print('number of records: ',num_rows)
num_cols = df.shape[1]
print('number of attributes: ',num_cols)
number of records: 567623 number of attributes: 16
print('Number of missing values:\n',df.isna().sum())
print('Total missing values: ',df.isna().sum().sum())
Number of missing values: INCIDENT_NUMBER 0 OFFENSE_CODE 0 OFFENSE_CODE_GROUP 214370 OFFENSE_DESCRIPTION 0 DISTRICT 3421 REPORTING_AREA 0 SHOOTING 351798 OCCURRED_ON_DATE 0 YEAR 0 MONTH 0 DAY_OF_WEEK 0 HOUR 0 STREET 11886 Lat 22530 Long 22530 Location 0 dtype: int64 Total missing values: 626535
df['SHOOTING']= df['SHOOTING'].fillna(0)
df['REPORTING_AREA'].isna().sum()
0
df.dropna(subset=['Lat', 'Long', 'DISTRICT'], inplace=True)
df = df.bfill(axis ='columns')
df.shape
(541990, 16)
df.tail()
| INCIDENT_NUMBER | OFFENSE_CODE | OFFENSE_CODE_GROUP | OFFENSE_DESCRIPTION | DISTRICT | REPORTING_AREA | SHOOTING | OCCURRED_ON_DATE | YEAR | MONTH | DAY_OF_WEEK | HOUR | STREET | Lat | Long | Location | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 567618 | 222004519 | 617 | LARCENY THEFT FROM BUILDING | LARCENY THEFT FROM BUILDING | A1 | 0 | 1/21/22 8:05 | 2022 | 1 | Friday | 8 | ARLINGTON ST | 0.0 | 0.0 | (0, 0) | |
| 567619 | 222004517 | 520 | BURGLARY - RESIDENTIAL | BURGLARY - RESIDENTIAL | D4 | 0 | 1/21/22 7:01 | 2022 | 1 | Friday | 7 | WASHINGTON ST | 0.0 | 0.0 | (0, 0) | |
| 567620 | 222004516 | 3126 | WARRANT ARREST - OUTSIDE OF BOSTON WARRANT | WARRANT ARREST - OUTSIDE OF BOSTON WARRANT | D4 | 167 | 0 | 1/21/22 7:25 | 2022 | 1 | Friday | 7 | HARRISON AVE | 0.0 | 0.0 | (0, 0) |
| 567621 | 122004001 | 3115 | INVESTIGATE PERSON | INVESTIGATE PERSON | E13 | 912 | 0 | 1/19/22 9:30 | 2022 | 1 | Wednesday | 9 | HEATH ST | 0.0 | 0.0 | (0, 0) |
| 567622 | 149210376 | 3126 | WARRANT ARREST - OUTSIDE OF BOSTON WARRANT | WARRANT ARREST - OUTSIDE OF BOSTON WARRANT | B2 | 0 | 1/2/22 0:00 | 2022 | 1 | Sunday | 0 | WASHINGTON ST & ROXBURY ST\nBOSTON MA 02119\n... | 42.3296 | -71.08462 | (42.32959998129787, -71.08462000476281) |
print('test for NAN after filling the missing values: ',df.isnull().values.any() )
test for NAN after filling the missing values: False
df.OFFENSE_CODE_GROUP.unique()
array(['Other', 'Confidence Games', 'Fraud', 'Harassment',
'Criminal Harassment', 'Larceny', 'Property Lost',
'Auto Theft Recovery', 'Larceny From Motor Vehicle',
'Investigate Person', 'Police Service Incidents',
'Warrant Arrests', 'Auto Theft', 'Restraining Order Violations',
'Property Found', 'Robbery', 'Firearm Violations',
'Medical Assistance', 'Simple Assault', 'Missing Person Located',
'Missing Person Reported', 'Embezzlement',
'Motor Vehicle Accident Response', 'Residential Burglary',
'Counterfeiting', 'Prisoner Related Incidents', 'Vandalism',
'Investigate Property', 'License Plate Related Incidents',
'Aggravated Assault', 'Offenses Against Child / Family',
'Firearm Discovery', 'Verbal Disputes', 'Search Warrants',
'Violations', 'Landlord/Tenant Disputes', 'Commercial Burglary',
'Evading Fare', 'Towed', 'Other Burglary', 'License Violation',
'Drug Violation', 'Fire Related Reports', 'Disorderly Conduct',
'Operating Under the Influence', 'Ballistics',
'Property Related Damage', 'Assembly or Gathering Violations',
'Recovered Stolen Property', 'Homicide', 'Liquor Violation',
'Aircraft', 'Bomb Hoax', 'Service', 'Gambling', 'Prostitution',
'Explosives', 'Arson', 'Phone Call Complaints',
'Harbor Related Incidents', 'INVESTIGATE PERSON',
'Burglary - No Property Taken', 'HOME INVASION', 'Manslaughter',
'HUMAN TRAFFICKING', 'HUMAN TRAFFICKING - INVOLUNTARY SERVITUDE',
'Biological Threat', 'Migrated Report - Other Part III',
'Migrated Report - Auto Theft',
'Migrated Report - Death Investigation',
'Migrated Report - Kidnapping',
'KIDNAPPING - ENTICING OR ATTEMPTED',
'SICK/INJURED/MEDICAL - PERSON', 'VERBAL DISPUTE',
'ASSAULT SIMPLE - BATTERY',
'Migrated Report - Aggravated Assault/Aggravated Assault & Battery',
'Migrated Report - Assault/Assault & Battery',
'VAL - OPERATING WITHOUT LICENSE',
'M/V - LEAVING SCENE - PROPERTY DAMAGE', 'WARRANT ARREST',
'LARCENY THEFT OF MV PARTS & ACCESSORIES',
'THREATS TO DO BODILY HARM',
'MISSING PERSON - NOT REPORTED - LOCATED', 'TOWED MOTOR VEHICLE',
'LARCENY ALL OTHERS', 'M/V ACCIDENT - PROPERTY Â\xa0DAMAGE',
'M/V ACCIDENT - OTHER', 'DRUGS - OTHER', 'VANDALISM',
'LARCENY THEFT OF BICYCLE', 'INVESTIGATE PROPERTY',
'MISSING PERSON - LOCATED', 'HARASSMENT', 'PROPERTY - FOUND',
'LARCENY THEFT FROM MV - NON-ACCESSORY', 'PROPERTY - LOST',
'TRESPASSING', 'AUTO THEFT', 'VAL - OPERATING AFTER REV/SUSP.',
'ANNOYING AND ACCOSTING', 'M/V ACCIDENT - OTHER CITY VEHICLE',
'ASSAULT - AGGRAVATED', 'MISSING PERSON',
'PROPERTY - ACCIDENTAL DAMAGE', 'FRAUD - FALSE PRETENSE / SCHEME',
'LARCENY SHOPLIFTING', 'BURGLARY - RESIDENTIAL - FORCE',
'FRAUD - CREDIT CARD / ATM FRAUD',
'M/V ACCIDENT - PERSONAL INJURY',
'SERVICE TO OTHER PD INSIDE OF MA.',
'BURGLARY - RESIDENTIAL - NO FORCE',
'VAL - OPERATING UNREG/UNINS Â\xa0CAR',
'DRUGS - SICK ASSIST - HEROIN', 'Migrated Report - Fraud',
'LARCENY PURSE SNATCH - NO FORCE ',
'M/V ACCIDENT INVOLVING PEDESTRIAN - INJURY',
'BURGLARY - RESIDENTIAL - ATTEMPT',
'FIRE REPORT - HOUSE, BUILDING, ETC.',
'ASSAULT - AGGRAVATED - BATTERY', 'DISORDERLY CONDUCT',
'ROBBERY - OTHER', 'BURGLARY - OTHER - FORCE',
'NOISY PARTY/RADIO-NO ARREST', 'SICK/INJURED/MEDICAL - POLICE',
'LARCENY THEFT FROM BUILDING', 'SUDDEN DEATH', 'ROBBERY - STREET',
'ASSAULT - SIMPLE', 'DRUGS - POSS CLASS A - HEROIN, ETC. ',
'M/V ACCIDENT - INVOLVING BICYCLE - NO INJURY',
'DRUGS - POSS CLASS A - INTENT TO MFR DIST DISP',
'DRUGS - SALE / MANUFACTURING', 'LIQUOR - DRINKING IN PUBLIC',
'INJURY BICYCLE NO M/V INVOLVED',
'WEAPON - FIREARM - CARRYING / POSSESSING, ETC',
'PROPERTY - STOLEN THEN RECOVERED', 'FORGERY / COUNTERFEITING',
'BALLISTICS EVIDENCE/FOUND', 'FRAUD - IMPERSONATION',
'M/V ACCIDENT - INVOLVING PEDESTRIAN - NO INJURY',
'STOLEN PROPERTY - BUYING / RECEIVING / POSSESSING',
'FUGITIVE FROM JUSTICE',
'DRUGS - POSS CLASS C - INTENT TO MFR DIST DISP',
'AUTO THEFT - LEASED/RENTED VEHICLE',
'BURGLARY - COMMERICAL - FORCE', 'Car Jacking Firearm-Gas Station',
'SEARCH WARRANT', 'DRUGS - POSS CLASS B - INTENT TO MFR DIST DISP',
'LICENSE PREMISE VIOLATION', 'LANDLORD - TENANT SERVICE',
'VAL - VIOLATION OF AUTO LAW - OTHER',
'M/V - LEAVING SCENE - PERSONAL INJURY',
'AUTO THEFT - MOTORCYCLE / SCOOTER',
'M/V ACCIDENT - INVOLVING Â\xa0BICYCLE - INJURY',
'M/V PLATES - LOST', 'PROPERTY - MISSING',
'RECOVERED - MV RECOVERED IN BOSTON (STOLEN OUTSIDE BOSTON)',
'M/V ACCIDENT - POLICE VEHICLE',
'INVESTIGATION FOR ANOTHER AGENCY',
'OPERATING UNDER THE INFLUENCE ALCOHOL', 'FRAUD - WELFARE',
'FIREARM/WEAPON - FOUND OR CONFISCATED', 'EVADING FARE',
'DEATH INVESTIGATION', 'DRUGS - POSS CLASS B - COCAINE, ETC.',
'VAL - OPERATING W/O AUTHORIZATION LAWFUL',
'DRUGS - POSS CLASS D - INTENT TO MFR DIST DISP',
'DRUGS - SICK ASSIST - OTHER NARCOTIC',
'VIOLATION - CITY ORDINANCE', 'ROBBERY - COMMERCIAL',
'BURGLARY - COMMERICAL - NO FORCE',
'ANIMAL CONTROL - DOG BITES - ETC.', 'GRAFFITI',
'WEAPON - FIREARM - OTHER VIOLATION', 'EXTORTION OR BLACKMAIL',
'REPORT AFFECTING OTHER DEPTS.', 'FIRE REPORT - CAR, BRUSH, ETC.',
'Car Jacking Firearm-Bank', 'FRAUD - WIRE',
'DRUGS - CLASS B TRAFFICKING OVER 18 GRAMS',
'BURGLARY - OTHER - ATTEMPT', 'WEAPON - OTHER - OTHER VIOLATION',
'EMBEZZLEMENT', 'WEAPON - OTHER - CARRYING / POSSESSING, ETC',
'DRUGS - CLASS A TRAFFICKING OVER 18 GRAMS',
'INTIMIDATING WITNESS', 'Car Jacking Firearm-Miscellaneous',
'HARBOR INCIDENT / VIOLATION', 'ANIMAL INCIDENTS',
'DEMONSTRATIONS/RIOT', 'DRUGS - CLASS D TRAFFICKING OVER 50 GRAMS',
'BURGLARY - COMMERICAL - ATTEMPT', 'ROBBERY - HOME INVASION',
'SUICIDE / SUICIDE ATTEMPT', 'OTHER OFFENSE',
'DRUGS - POSS CLASS C', 'DRUGS - POSS CLASS D',
'DISTURBING THE PEACE', 'DRUGS - POSSESSION',
'DRUGS - SICK ASSIST - OTHER HARMFUL DRUG',
'PROSTITUTION - SOLICITING', 'BURGLARY - OTHER - NO FORCE',
'LARCENY PICK-POCKET', 'VIOLATION - HAWKER AND PEDDLER',
'MURDER, NON-NEGLIGIENT MANSLAUGHTER', 'TRUANCY / RUNAWAY',
'BIOLOGICAL THREATS', 'EXPLOSIVES - POSSESSION OR USE',
'PROPERTY - LOST THEN LOCATED',
'OPERATING UNDER THE INFLUENCE DRUGS', 'DRUGS - POSS CLASS E',
'ARSON', 'AIRCRAFT INCIDENTS',
'SERVICE TO OTHER PD OUTSIDE OF MA.',
'Car Jacking Firearm-Highway', 'POSSESSION OF BURGLARIOUS TOOLS',
'DRUGS - POSSESSION OF DRUG PARAPHANALIA', 'ROBBERY - CAR JACKING',
'ANIMAL ABUSE', 'FIRE REPORT/ALARM - FALSE',
'LIQUOR LAW VIOLATION', 'AFFRAY',
'DANGEROUS OR HAZARDOUS CONDITION', 'ROBBERY - BANK',
'DRUGS - POSS CLASS E - INTENT TO MFR DIST DISP',
'EXPLOSIVES - TURNED IN OR FOUND', 'CHINS',
'PROTECTIVE CUSTODY / SAFEKEEPING', 'PROPERTY - CONCEALING LEASED',
'CRIMINAL HARASSMENT',
'DRUGS - CONSP TO VIOL CONTROLLED SUBSTANCE', 'BOMB THREAT',
'Larceny-Theft From Buildings', 'RECOVERED STOLEN PLATE',
'OBSCENE PHONE CALLS', 'WEAPON - FIREARM - SALE / TRAFFICKING',
'KIDNAPPING/CUSTODIAL KIDNAPPING',
'PRISONER - SUICIDE / SUICIDE ATTEMPT', 'FIREARM/WEAPON - LOST',
'NOISY PARTY/RADIO-ARREST', 'MANSLAUGHTER - VEHICLE - NEGLIGENCE',
'PROSTITUTION', 'FIREARM/WEAPON - ACCIDENTAL INJURY / DEATH',
'LARCENY THEFT FROM COIN-OP MACHINE', 'CONSPIRACY EXCEPT DRUG LAW',
'HARASSMENT/ CRIMINAL HARASSMENT', 'PROPERTY - LOST/ MISSING',
'SERVICE TO OTHER AGENCY', 'BURGLARY - COMMERICAL',
'LANDLORD - TENANT', 'M/V ACCIDENT - PROPERTY DAMAGE',
'M/V ACCIDENT - INVOLVING PEDESTRIAN - INJURY', 'ROBBERY',
'VAL - VIOLATION OF AUTO LAW', 'BURGLARY - RESIDENTIAL',
'M/V ACCIDENT - INVOLVING BICYCLE - INJURY',
'RECOVERED - MV RECOVERED IN BOSTON (STOLEN IN BOSTON) MUST BE SUPPLEMENTAL',
'BREAKING AND ENTERING (B&E) MOTOR VEHICLE',
'Migrated Report - Motor Vehicle Crash',
'Migrated Report - Vandalism/Destruction of Property',
'Migrated Report - Other Larceny',
'Migrated Report - Investigate Person',
'Migrated Report - Other Part II',
'Migrated Report - Affray/Disturbing the Peace/Disorderly Conduct',
'Migrated Report - Burglary/Breaking and Entering',
'Migrated Report - Larceny From MV',
'Migrated Report - Drugs - Possession/Manufacturing/Distribute',
'Migrated Report - Investigate Property',
'Migrated Report - Auto Law Violation',
'Migrated Report - Robbery',
'Migrated Report - Counterfeiting/Forgery',
'Migrated Report - Weapons Violation',
'Migrated Report - Injured/Medical/Sick Assist',
'Migrated Report - Criminal Homicide',
'Migrated Report - Stolen Property',
'Migrated Report - Other Auto Theft',
'Migrated Report - Embezzlement', 'FIRE REPORT', 'SICK ASSIST',
'VIOLATION - RESTRAINING ORDER (NO ARREST)',
'DRUGS - POSSESSION/ SALE/ MANUFACTURING/ USE',
'WARRANT ARREST - OUTSIDE OF BOSTON WARRANT',
'LIQUOR/ALCOHOL - DRINKING IN PUBLIC',
'SICK ASSIST - DRUG RELATED ILLNESS',
'WEAPON VIOLATION - CARRY/ POSSESSING/ SALE/ TRAFFICKING/ OTHER',
'ANIMAL INCIDENTS (DOG BITES, LOST DOG, ETC)',
'LARCENY PURSE SNATCH - NO FORCE',
'WARRANT ARREST - BOSTON WARRANT (MUST BE SUPPLEMENTAL)',
'OPERATING UNDER THE INFLUENCE (OUI) ALCOHOL',
'BREAKING AND ENTERING (B&E) MOTOR VEHICLE (NO PROPERTY STOLEN)',
'VIOLATION - HARASSMENT PREVENTION ORDER',
'OPERATING UNDER THE INFLUENCE (OUI) DRUGS',
'DISTURBING THE PEACE/ DISORDERLY CONDUCT/ GATHERING CAUSING ANNOYANCE/ NOISY PAR',
'CHILD REQUIRING ASSISTANCE (FOMERLY CHINS)',
'VIOL. OF RESTRAINING ORDER W ARREST', 'DRUNKENNESS',
'Evidence Tracker Incidents',
'PROSTITUTION - ASSISTING OR PROMOTING',
'KIDNAPPING/CUSTODIAL KIDNAPPING/ ABDUCTION',
'Justifiable Homicide', 'MANSLAUGHTER - NEGLIGENCE'], dtype=object)
import folium
from folium import Marker
m_2 = folium.Map(location=[42.32,-71.0589], tiles='cartodbpositron', zoom_start=12)
daytime_robberies = df[((df.OFFENSE_CODE_GROUP == 'Robbery') & (df.HOUR.isin(range(9,18))))]
# Add points to the map
for idx, row in daytime_robberies.iterrows():
Marker([row['Lat'], row['Long']], popup=row['DISTRICT']).add_to(m_2)
# Display the map
m_2
import folium
from folium import Marker
m_3 = folium.Map(location=[42.32,-71.0589], tiles='cartodbpositron', zoom_start=12)
drunk = df[(df.OFFENSE_CODE_GROUP == 'DRUNKENNESS')]
# Add points to the map
for idx, row in drunk.iterrows():
Marker([row['Lat'], row['Long']], popup=row['DISTRICT']).add_to(m_3)
# Display the map
m_3
m_4 = folium.Map(location=[42.32,-71.0589], tiles='cartodbpositron', zoom_start=12)
Harr = df[(df.OFFENSE_CODE_GROUP == 'Harassment')]
# Add points to the map
for idx, row in Harr.iterrows():
Marker([row['Lat'], row['Long']]).add_to(m_4)
# Display the map
m_4
from sklearn.preprocessing import LabelEncoder
gle = LabelEncoder()
columns = ['INCIDENT_NUMBER', 'OFFENSE_CODE_GROUP', 'DISTRICT', 'SHOOTING', 'OCCURRED_ON_DATE', 'DAY_OF_WEEK', 'Location', 'STREET']
for c,col in enumerate(columns):
labels = gle.fit_transform(df[col].astype(str))
df[col] = labels
df.head()
| INCIDENT_NUMBER | OFFENSE_CODE | OFFENSE_CODE_GROUP | OFFENSE_DESCRIPTION | DISTRICT | REPORTING_AREA | SHOOTING | OCCURRED_ON_DATE | YEAR | MONTH | DAY_OF_WEEK | HOUR | STREET | Lat | Long | Location | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 437999 | 2647 | 133 | THREATS TO DO BODILY HARM | 6 | 280 | 0 | 287775 | 2015 | 8 | 0 | 10 | 2610 | 42.330119 | -71.084251 | 16151 |
| 1 | 432189 | 1106 | 247 | FRAUD - CREDIT CARD / ATM FRAUD | 8 | 356 | 0 | 275894 | 2015 | 8 | 4 | 0 | 5975 | 42.300605 | -71.061268 | 4377 |
| 2 | 410464 | 1107 | 14 | FRAUD - IMPERSONATION | 0 | 172 | 0 | 392471 | 2015 | 11 | 1 | 12 | 3691 | 42.334288 | -71.072395 | 18511 |
| 3 | 382241 | 1107 | 14 | FRAUD - IMPERSONATION | 2 | 525 | 0 | 248106 | 2015 | 7 | 0 | 10 | 3362 | 42.237009 | -71.129566 | 2447 |
| 4 | 361918 | 1102 | 14 | FRAUD - FALSE PRETENSE / SCHEME | 11 | 159 | 0 | 179 | 2015 | 12 | 5 | 12 | 2062 | 42.342432 | -71.072258 | 22398 |
pcorr = df.corr(method='pearson')
print(pcorr)
INCIDENT_NUMBER OFFENSE_CODE_GROUP DISTRICT SHOOTING \
INCIDENT_NUMBER 1.000000 -0.035080 -0.000502 0.005544
OFFENSE_CODE_GROUP -0.035080 1.000000 0.000846 -0.017243
DISTRICT -0.000502 0.000846 1.000000 0.007623
SHOOTING 0.005544 -0.017243 0.007623 1.000000
OCCURRED_ON_DATE -0.071468 -0.005393 -0.002008 0.002724
DAY_OF_WEEK 0.001444 0.003476 0.003925 -0.002974
STREET 0.004485 -0.000509 0.035064 -0.003109
Location -0.204209 0.000389 -0.202385 -0.020227
OCCURRED_ON_DATE DAY_OF_WEEK STREET Location
INCIDENT_NUMBER -0.071468 0.001444 0.004485 -0.204209
OFFENSE_CODE_GROUP -0.005393 0.003476 -0.000509 0.000389
DISTRICT -0.002008 0.003925 0.035064 -0.202385
SHOOTING 0.002724 -0.002974 -0.003109 -0.020227
OCCURRED_ON_DATE 1.000000 0.002525 -0.000428 0.039201
DAY_OF_WEEK 0.002525 1.000000 -0.001278 -0.003158
STREET -0.000428 -0.001278 1.000000 0.023391
Location 0.039201 -0.003158 0.023391 1.000000
plt.matshow(pcorr)
plt.show()
kcorr = df.corr(method='kendall')
print(kcorr)
INCIDENT_NUMBER OFFENSE_CODE_GROUP DISTRICT SHOOTING \
INCIDENT_NUMBER 1.000000 -0.028901 0.000345 0.011805
OFFENSE_CODE_GROUP -0.028901 1.000000 -0.002290 -0.016327
DISTRICT 0.000345 -0.002290 1.000000 -0.002069
SHOOTING 0.011805 -0.016327 -0.002069 1.000000
OCCURRED_ON_DATE -0.013665 -0.002947 -0.000490 0.001863
DAY_OF_WEEK 0.001053 0.001727 0.002270 -0.002449
STREET 0.002955 0.000127 0.024361 -0.000807
Location -0.152945 0.004905 -0.098287 -0.021427
OCCURRED_ON_DATE DAY_OF_WEEK STREET Location
INCIDENT_NUMBER -0.013665 0.001053 0.002955 -0.152945
OFFENSE_CODE_GROUP -0.002947 0.001727 0.000127 0.004905
DISTRICT -0.000490 0.002270 0.024361 -0.098287
SHOOTING 0.001863 -0.002449 -0.000807 -0.021427
OCCURRED_ON_DATE 1.000000 0.001931 -0.000311 0.028930
DAY_OF_WEEK 0.001931 1.000000 -0.000918 -0.002309
STREET -0.000311 -0.000918 1.000000 0.013075
Location 0.028930 -0.002309 0.013075 1.000000
plt.matshow(kcorr)
plt.show()
print(df['OFFENSE_CODE'].value_counts())
3115 36406
3006 29493
3831 27947
1402 25470
3114 21427
...
315 1
112 1
1105 1
2500 1
122 1
Name: OFFENSE_CODE, Length: 252, dtype: int64
From above, we can see that incident with offense code 3115 occured most. So, now we will find out its description.
ind = df.index[df['OFFENSE_CODE'] == 3115].tolist()
offense_desc = list()
for i in ind:
offense_desc.append(df['OFFENSE_DESCRIPTION'][i])
print('Most occuring incidents is: ', set(offense_desc))
Most occuring incidents is: {'INVESTIGATE PERSON'}
district_incident_table = (df['DISTRICT'].value_counts()).to_frame()
district_incident_table.columns = ['Incidents']
district_incident_table.index.name = 'District'
district_incident_table
| Incidents | |
|---|---|
| District | |
| B2 | 87461 |
| C11 | 75149 |
| D4 | 73402 |
| B3 | 63335 |
| A1 | 62367 |
| C6 | 42396 |
| D14 | 36743 |
| E18 | 31529 |
| E13 | 31350 |
| E5 | 24673 |
| A7 | 23643 |
| A15 | 11762 |
| External | 392 |
fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
ax.bar(district_incident_table.index,district_incident_table['Incidents'])
plt.show()
#sns.boxplot(df['YEAR'])
sns.boxplot(df['Location'])
/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. FutureWarning
<matplotlib.axes._subplots.AxesSubplot at 0x7f82dd384350>